In [5]:
from numpy import *
# y = mx + b
# m is slope, b is y-intercept
# here we are calculating the sum of squared error by using the equation which we have seen.
def compute_error_for_line_given_points(b, m, points):
totalError = 0
for i in range(0, len(points)):
x = points[i, 0]
y = points[i, 1]
totalError += (y - (m * x + b)) ** 2
return totalError / float(len(points))
def step_gradient(b_current, m_current, points, learningRate):
b_gradient = 0
m_gradient = 0
N = float(len(points))
for i in range(0, len(points)):
x = points[i, 0]
y = points[i, 1]
# Here we are coding up out partial derivatives equations and
# generate the updated value for m and b to get the local minima
b_gradient += -(2/N) * (y - ((m_current * x) + b_current))
m_gradient += -(2/N) * x * (y - ((m_current * x) + b_current))
# we are multiplying the b_gradient and m_gradient with learningrate
# so it is important to choose ideal learning rate if we make it to high then our model learn nothing
# if we make it to small then our training is to slow and there are the chances of over fitting
# so learning rate is important hyper parameter.
new_b = b_current - (learningRate * b_gradient)
new_m = m_current - (learningRate * m_gradient)
return [new_b, new_m]
def gradient_descent_runner(points, starting_b, starting_m, learning_rate, num_iterations):
b = starting_b
m = starting_m
for i in range(num_iterations):
# we are using step_gradient function to calculate the actual partial derivatives for error function
b, m = step_gradient(b, m, array(points), learning_rate)
return [b, m]
def run():
# Step 1 : Read data
# genfromtext is used to read out data from data.csv file.
points = genfromtxt("../data/data1.csv", delimiter=",")
# Step2 : Define certain hyperparameters
# how fast our model will converge means how fast we will get the line of best fit.
# Converge means how fast our ML model get the optimal line of best fit.
learning_rate = 0.0001
# Here we need to draw the line which is best fit for our data.
# so we are using y = mx + b ( x and y are points; m is slop; b is the y intercept)
# for initial y-intercept guess
initial_b = 0
# initial slope guess
initial_m = 0
# How much do you want to train the model?
# Here data set is small so we iterate this model for 1000 times.
num_iterations = 1000
# Step 3 - print the values of b, m and all function which calculate gradient descent and errors
# Here we are printing the initial values of b, m and error.
# As well as there is the function compute_error_for_line_given_points()
# which compute the errors for given point
print ("Starting gradient descent at b = {0}, m = {1}, MSE = {2}".format(initial_b, initial_m, compute_error_for_line_given_points(initial_b, initial_m, points)))
print ("Running...")
# By using this gradient_descent_runner() function we will actually calculate gradient descent
[b, m] = gradient_descent_runner(points, initial_b, initial_m, learning_rate, num_iterations)
# Here we are printing the values of b, m and error after getting the line of best fit for the given dataset.
print ("After {0} iterations b = {1}, m = {2}, MSE = {3}".format(num_iterations, b, m, compute_error_for_line_given_points(b, m, points)))
if __name__ == '__main__':
run()
In [ ]:
In [4]:
from sklearn.linear_model import LinearRegression
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
In [5]:
data = pd.read_csv("../data/data1.csv", header=None)
In [6]:
X = data.iloc[:,0]
print(X.shape)
X = X.values.reshape(-1,1)
print(X.shape)
y = data.iloc[:,1]
print(y.shape)
In [7]:
lr2 = LinearRegression()
lr2.fit(X,y)
y2 = lr2.predict(X)
In [8]:
from sklearn.metrics import mean_squared_error
print('MSE = ', mean_squared_error(y, y2))
In [11]:
plt.plot(X, y, '.')
plt.plot(X, y2, '-')
Out[11]:
In [8]:
# your turn .. fit Stochastic-Gradient-Descent using Scikit-learn!
# SGDRegressor
# Measure performance
# Validate results
In [ ]:
In [2]:
from sklearn.linear_model import LinearRegression, SGDRegressor
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
boston = load_boston()
In [3]:
# describe the dataset
print(boston.DESCR)
In [7]:
# your turn!
#
In [ ]:
In [ ]:
In [ ]: